####################################
# Code to generate similarity      #
# measure used in:                 #
#                                  #
# "From Economic Competition to    #
# Military Combat: Export          #
# Similarity and International     #
# Conflict" (main text)            #
#                                  #
# J. Tyson Chatagnier and Kerim    #
# Can Kavakli                      #
#                                  #
# Published in the Journal of      #
# Conflict Resolution              #
####################################


library(foreign)
library(matrixStats)
library(Hmisc)

rm(list = ls(all = TRUE))

###################################
# Define strategic resource codes #
# based on coding in Cullen       #
# Goenner's 2010 JPR article.     #
# Will be needed for calculating  #
# strategic similarity.           #
###################################

strat.two <- c("32",
               "33",
               "34",
               "35",
               "51",
               "87",
               "77"
               )

strat.three <- c("287",
                 "681",
                 "682",
                 "683",
                 "684",
                 "685",
                 "686",
                 "687",
                 "689",
                 "522",
                 "523",
                 "764",
                 "286",
                 "524",
                 "792"
                 )
strat.four <- c("7187",
                "7931",
                "9510"
                )

##############################
# Define necessary functions #
##############################

source("country_recode.R")
                        # Program providing the RecodeCountries()
                        # function, used in GetSim() below.

GetSim <- function(file) {
  trade <- read.dta(file)
  trade <- trade[, c(1,
                     3,
                     5,
                     6,
                     9
                     )
                 ]
                          # Keep only year, exporter, category, and value.
  trade.imp <- trade[trade$exporter == "World", ]
  trade.exp <- trade[trade$importer == "World", ]
                        # We want one matrix with only imports and one with
                        # only exports.
  trade.imp$importer <- suppressWarnings(RecodeCountries(trade.imp$importer))
  trade.exp$exporter <- suppressWarnings(RecodeCountries(trade.exp$exporter))
  trade.imp <- trade.imp[!is.na(trade.imp$importer), ]
  trade.exp <- trade.exp[!is.na(trade.exp$exporter), ]
                          # Recode exporter names to COW country codes,
                          # and remove all NAs.
  trade.imp$value <- as.numeric(trade.imp$value)
  trade.exp$value <- as.numeric(trade.exp$value)
                        # Need to make sure these are numeric variables.
  trade.imp <- aggregate(trade.imp$value, 
                         list(trade.imp$importer, 
                              trade.imp$year,
                              trade.imp$sitc4
                              ), 
                         sum
                         )
  trade.exp <- aggregate(trade.exp$value, 
                         list(trade.exp$exporter, 
                              trade.exp$year,
                              trade.exp$sitc4
                              ), 
                         sum
                         )
                          # This aggregates across duplicates.
  
  colnames(trade.imp) <- c("importer",
                           "year",
                           "category",
                           "value"
                           )
  colnames(trade.exp) <- c("exporter",
                           "year",
                           "category",
                           "value"
                           )
  trade.imp <- reshape(trade.imp,
                       timevar   = "category",
                       idvar     = c("importer",
                                     "year"
                                     ),
                       direction = "wide"
                       )
  trade.exp <- reshape(trade.exp,
                       timevar   = "category",
                       idvar     = c("exporter",
                                     "year"
                                     ),
                       direction = "wide"
                       )
                        # Reshape to put trade on the columns.
  trade.imp[is.na(trade.imp)] <- 0
  trade.exp[is.na(trade.exp)] <- 0
                        # Recode NAs to 0.
  tot.imp <- cbind(trade.imp[, 1],
                   rowSums(trade.imp[, - c(1 : 2)])
                   )
  colnames(tot.imp) <- c("country", 
                         "imports"
                         )
  tot.exp <- cbind(trade.exp[, 1], 
                   rowSums(trade.exp[, - c(1 : 2)])
                   )
  colnames(tot.exp) <- c("country", 
                         "exports"
                         )
  tot.trade <- merge(tot.imp,
                     tot.exp,
                     by    = "country",
                     all.x = TRUE,
                     all.y = TRUE
                     )
  tot.trade <- cbind(tot.trade$country,
                     rowSums(tot.trade, 
                             na.rm = TRUE
                             )
                     )
  colnames(tot.trade) <- c("country", 
                           "trade"
                           )  
  raw.goods.imp <- which(as.numeric(substr(colnames(trade.imp)[-c(1 : 3)],
                                           7,
                                           7
                                           ) 
                                    ) < 5 |
                         substr(colnames(trade.imp)[-c(1 : 3)],
                                7,
                                9
                                ) == "667" |
                         substr(colnames(trade.imp)[-c(1 : 3)],
                                7,
                                8
                                ) == "68" |
                         substr(colnames(trade.imp)[-c(1 : 3)],
                                           7,
                                           9
                                    ) == "971"
                         ) + 3
  man.goods.imp <- which(as.numeric(substr(colnames(trade.imp)[-c(1 : 3)],
                                           7,
                                           7
                                           )
                                    ) >= 5 &
                         as.numeric(substr(colnames(trade.imp)[-c(1 : 3)],
                                           7,
                                           7
                                           )
                                    ) < 9 &
                         substr(colnames(trade.imp)[-c(1 : 3)],
                                7,
                                9
                                ) != "667" &
                         substr(colnames(trade.imp)[-c(1 : 3)],
                                7,
                                8
                                ) != "68"
                         ) + 3
  raw.goods.exp <- which(as.numeric(substr(colnames(trade.exp)[-c(1 : 3)],
                                           7,
                                           7
                                           ) 
                                    ) < 5 |
                         substr(colnames(trade.exp)[-c(1 : 3)],
                                7,
                                9
                                ) == "667" |
                         substr(colnames(trade.exp)[-c(1 : 3)],
                                7,
                                8
                                ) == "68" |
                         substr(colnames(trade.exp)[-c(1 : 3)],
                                           7,
                                           9
                                    ) == "971"
                         ) + 3
  man.goods.exp <- which(as.numeric(substr(colnames(trade.exp)[-c(1 : 3)],
                                           7,
                                           7
                                           )
                                    ) >= 5 &
                         as.numeric(substr(colnames(trade.exp)[-c(1 : 3)],
                                           7,
                                           7
                                           )
                                    ) < 9 &
                         substr(colnames(trade.exp)[-c(1 : 3)],
                                7,
                                9
                                ) != "667" &
                         substr(colnames(trade.exp)[-c(1 : 3)],
                                7,
                                8
                                ) != "68"
                         ) + 3
                        # Get columns for raw/manufactured values.
                        # Using the UNCTAD product groupings
  nooil.imp <- which(substr(colnames(trade.imp)[-c(1 : 3)],
                            7,
                            8
                            ) != "33"
                     ) + 3
  nooil.exp <- which(substr(colnames(trade.exp)[-c(1 : 3)],
                            7,
                            8
                            ) != "33"
                     ) + 3
                        # And for non-oil goods.
  oil.imp <- which(substr(colnames(trade.imp)[-c(1 : 3)],
                          7,
                          8
                          ) == "33"
                   ) + 3
  oil.exp <- which(substr(colnames(trade.exp)[-c(1 : 3)],
                          7,
                          8
                          ) == "33"
                   ) + 3
                        # And for oil goods.
  strat.imp <- which(substr(colnames(trade.imp)[-c(1 : 3)],
                            7,
                            8
                            ) %in% strat.two |
                     substr(colnames(trade.imp)[-c(1 : 3)],
                            7,
                            9
                            ) %in% strat.three |
                     substr(colnames(trade.imp)[-c(1 : 3)],
                            7,
                            10
                            ) %in% strat.four
                     ) + 3
  strat.exp <- which(substr(colnames(trade.exp)[-c(1 : 3)],
                            7,
                            8
                            ) %in% strat.two |
                     substr(colnames(trade.exp)[-c(1 : 3)],
                            7,
                            9
                            ) %in% strat.three |
                     substr(colnames(trade.exp)[-c(1 : 3)],
                            7,
                            10
                            ) %in% strat.four
                     ) + 3
                        # For strategic resources
  nostrat.imp <- which(!substr(colnames(trade.imp)[-c(1 : 3)],
                               7,
                               8
                               ) %in% strat.two &
                       !substr(colnames(trade.imp)[-c(1 : 3)],
                              7,
                              9
                              ) %in% strat.three &
                       !substr(colnames(trade.imp)[-c(1 : 3)],
                              7,
                              10
                              ) %in% strat.four
                       ) + 3
  nostrat.exp <- which(!substr(colnames(trade.exp)[-c(1 : 3)],
                               7,
                               8
                               ) %in% strat.two &
                       !substr(colnames(trade.exp)[-c(1 : 3)],
                              7,
                              9
                              ) %in% strat.three &
                       !substr(colnames(trade.exp)[-c(1 : 3)],
                              7,
                              10
                              ) %in% strat.four
                       ) + 3
                        # And non-strategic resources
  total.imp <- rowSums(trade.imp[, -c(1 : 2)])
  total.exp <- rowSums(trade.exp[, -c(1 : 2)])
  total.raw.imp <- rowSums(trade.imp[, raw.goods.imp])
  total.raw.exp <- rowSums(trade.exp[, raw.goods.exp])
  total.man.imp <- rowSums(trade.imp[, man.goods.imp])
  total.man.exp <- rowSums(trade.exp[, man.goods.exp])
  total.nooil.imp <- rowSums(trade.imp[, nooil.imp])
  total.nooil.exp <- rowSums(trade.exp[, nooil.exp])
  total.oil.imp <- rowSums(trade.imp[, oil.imp])
  total.oil.exp <- rowSums(trade.exp[, oil.exp])
  total.strat.imp <- rowSums(trade.imp[, strat.imp])
  total.strat.exp <- rowSums(trade.exp[, strat.exp])
  total.nostrat.imp <- rowSums(trade.imp[, nostrat.imp])
  total.nostrat.exp <- rowSums(trade.exp[, nostrat.exp])
                        # Get total annual trade for each country-year.
  total.raw.imp <- ifelse(total.raw.imp == 0,
                          1,
                          total.raw.imp
                          )
  total.raw.exp <- ifelse(total.raw.exp == 0,
                          1,
                          total.raw.exp
                          )
  total.man.imp <- ifelse(total.man.imp == 0,
                          1,
                          total.man.imp
                          )
  total.man.exp <- ifelse(total.man.exp == 0,
                          1,
                          total.man.exp
                          )
  total.nooil.imp <- ifelse(total.nooil.imp == 0,
                            1,
                            total.nooil.imp
                            )
  total.nooil.exp <- ifelse(total.nooil.exp == 0,
                            1,
                            total.nooil.exp
                            )
  total.oil.imp <- ifelse(total.oil.imp == 0,
                          1,
                          total.oil.imp
                          )
  total.oil.exp <- ifelse(total.oil.exp == 0,
                          1,
                          total.oil.exp
                          )
  total.nostrat.imp <- ifelse(total.nostrat.imp == 0,
                            1,
                            total.nostrat.imp
                            )
  total.nostrat.exp <- ifelse(total.nostrat.exp == 0,
                            1,
                            total.nostrat.exp
                            )
  total.strat.imp <- ifelse(total.strat.imp == 0,
                          1,
                          total.strat.imp
                          )
  total.strat.exp <- ifelse(total.strat.exp == 0,
                          1,
                          total.strat.exp
                          )

                        # If the value is zero, then set this to 1, to get
                        # 0/1 below, rather than 0/0.
  imp.proportions <- trade.imp[, -c(1 : 2)] / total.imp
  exp.proportions <- trade.exp[, -c(1 : 2)] / total.exp
  imp.raw.proportions <- trade.imp[, raw.goods.imp] / total.raw.imp
  exp.raw.proportions <- trade.exp[, raw.goods.exp] / total.raw.exp
  imp.man.proportions <- trade.imp[, man.goods.imp] / total.man.imp
  exp.man.proportions <- trade.exp[, man.goods.exp] / total.man.exp
  imp.nooil.proportions <- trade.imp[, nooil.imp] / total.nooil.imp
  exp.nooil.proportions <- trade.exp[, nooil.exp] / total.nooil.exp
  imp.oil.proportions <- trade.imp[, oil.imp] / total.oil.imp
  exp.oil.proportions <- trade.exp[, oil.exp] / total.oil.exp
  imp.nostrat.proportions <- trade.imp[, nostrat.imp] / total.nostrat.imp
  exp.nostrat.proportions <- trade.exp[, nostrat.exp] / total.nostrat.exp
  imp.strat.proportions <- trade.imp[, strat.imp] / total.strat.imp
  exp.strat.proportions <- trade.exp[, strat.exp] / total.strat.exp
                        # For each commodity, this gives the share that it
                        # occupies of the country's total trade in that 
                        # year. This will sum to 1 along rows.
  imp.mat <- cbind(trade.imp[, 1 : 2],
                   imp.proportions
                   )
  exp.mat <- cbind(trade.exp[, 1 : 2],
                   exp.proportions
                   )
  imp.mat.raw <- cbind(trade.imp[, 1 : 2],
                       imp.raw.proportions
                       )
  exp.mat.raw <- cbind(trade.exp[, 1 : 2],
                       exp.raw.proportions
                       )
  imp.mat.man <- cbind(trade.imp[, 1 : 2],
                       imp.man.proportions
                       )
  exp.mat.man <- cbind(trade.exp[, 1 : 2],
                       exp.man.proportions
                       )
  imp.mat.nooil <- cbind(trade.imp[, 1 : 2],
                         imp.nooil.proportions
                         )
  exp.mat.nooil <- cbind(trade.exp[, 1 : 2],
                         exp.nooil.proportions
                         )
  imp.mat.oil <- cbind(trade.imp[, 1 : 2],
                       imp.oil.proportions
                       )
  exp.mat.oil <- cbind(trade.exp[, 1 : 2],
                       exp.oil.proportions
                       )
  imp.mat.nostrat <- cbind(trade.imp[, 1 : 2],
                         imp.nostrat.proportions
                         )
  exp.mat.nostrat <- cbind(trade.exp[, 1 : 2],
                         exp.nostrat.proportions
                         )
  imp.mat.strat <- cbind(trade.imp[, 1 : 2],
                       imp.strat.proportions
                       )
  exp.mat.strat <- cbind(trade.exp[, 1 : 2],
                       exp.strat.proportions
                       )
                        # Put country year data into the matrix. 
  mat.trade <- merge(imp.mat,
                     exp.mat,
                     by.x = c("importer",
                              "year"
                              ),
                     by.y = c("exporter",
                              "year"
                              )
                     )
  mat.raw <- merge(imp.mat.raw,
                   exp.mat.raw,
                   by.x = c("importer",
                            "year"
                            ),
                   by.y = c("exporter",
                            "year"
                            )
                   )
  mat.man <- merge(imp.mat.man,
                   exp.mat.man,
                   by.x = c("importer",
                            "year"
                            ),
                   by.y = c("exporter",
                            "year"
                            )
                   )
  mat.nooil <- merge(imp.mat.nooil,
                     exp.mat.nooil,
                     by.x = c("importer",
                              "year"
                              ),
                     by.y = c("exporter",
                              "year"
                              )
                     )
  mat.oil <- merge(imp.mat.oil,
                   exp.mat.oil,
                   by.x = c("importer",
                            "year"
                            ),
                   by.y = c("exporter",
                            "year"
                            )
                   )
  mat.nostrat <- merge(imp.mat.nostrat,
                     exp.mat.nostrat,
                     by.x = c("importer",
                              "year"
                              ),
                     by.y = c("exporter",
                              "year"
                              )
                     )
  mat.strat <- merge(imp.mat.strat,
                   exp.mat.strat,
                   by.x = c("importer",
                            "year"
                            ),
                   by.y = c("exporter",
                            "year"
                            )
                   )

  dyads <- expand.grid(unique(mat.trade$importer),
                       unique(mat.trade$importer)
                       )
  dyads[, 3] <- trade[1, 1]
                        # Include a column of year values. This should
                        # be the same for all rows of the trade matrix,
                        # so just take the first observation.
  mat.cor <- rcorr(t(mat.trade[, -c(1 : 2)]))$r
  cor.exp <- rcorr(t(mat.trade[, (ncol(imp.mat) + 1) : 
                                  ncol(mat.trade)
                               ]))$r
  cor.raw <- rcorr(t(mat.raw[, -c(1 : 2)]))$r
  cor.rawexp <- rcorr(t(mat.raw[, (ncol(imp.mat.raw) + 1) : 
                                   ncol(mat.raw)
                                ]))$r
  cor.man <- rcorr(t(mat.man[, -c(1 : 2)]))$r
  cor.manexp <- rcorr(t(mat.man[, (ncol(imp.mat.man) + 1) : 
                                   ncol(mat.man)
                                ]))$r
  cor.nooil <- rcorr(t(mat.nooil[, -c(1 : 2)]))$r
  cor.nooilexp <- rcorr(t(mat.nooil[, (ncol(imp.mat.nooil) + 1) : 
                                       ncol(mat.nooil)
                                    ]))$r
  cor.oil <- rcorr(t(mat.oil[, -c(1 : 2)]))$r
  cor.oilexp <- rcorr(t(mat.oil[, (ncol(imp.mat.oil) + 1) : 
                                   ncol(mat.oil)
                                   ]))$r
  cor.nostrat <- rcorr(t(mat.nostrat[, -c(1 : 2)]))$r
  cor.nostratexp <- rcorr(t(mat.nostrat[, (ncol(imp.mat.nostrat) + 1) : 
                                       ncol(mat.nostrat)
                                    ]))$r
  cor.strat <- rcorr(t(mat.strat[, -c(1 : 2)]))$r
  cor.stratexp <- rcorr(t(mat.strat[, (ncol(imp.mat.strat) + 1) : 
                                   ncol(mat.strat)
                                   ]))$r
                        # Get a matrix of correlations for every pair
                        # of rows in mat.trade
  dyads[, 4] <- as.vector(mat.cor)
  dyads[, 5] <- as.vector(cor.exp)
  dyads[, 6] <- as.vector(cor.raw)
  dyads[, 7] <- as.vector(cor.rawexp)
  dyads[, 8] <- as.vector(cor.man)
  dyads[, 9] <- as.vector(cor.manexp)  
  dyads[, 10] <- as.vector(cor.nooil)
  dyads[, 11] <- as.vector(cor.nooilexp)
  dyads[, 12] <- as.vector(cor.oil)
  dyads[, 13] <- as.vector(cor.oilexp)
  dyads[, 14] <- as.vector(cor.nostrat)
  dyads[, 15] <- as.vector(cor.nostratexp)
  dyads[, 16] <- as.vector(cor.strat)
  dyads[, 17] <- as.vector(cor.stratexp)
                        # Bind them onto the dyads matrix

  colnames(dyads) <- c("country1", 
                       "country2", 
                       "year",
                       "correlation",
                       "exports",
                       "raw",
                       "rawexp",
                       "man",
                       "manexp",
                       "nooil",
                       "nooilexp",
                       "oil",
                       "oilexp",
                       "nostrat",
                       "nostratexp",
                       "strat",
                       "stratexp"
                       )
  dyads <- dyads[with(dyads, order(country1, country2)), ]
  sim.df <- data.frame(country1        = dyads$country1,
                       country2        = dyads$country2,
                       year            = dyads$year,
                       correlation     = dyads$correlation,
                       exports         = dyads$exports,
                       raw             = dyads$raw,
                       rawexports      = dyads$rawexp,
                       man             = dyads$man,
                       manexports      = dyads$manexp,
                       nonoil          = dyads$nooil,
                       nonoilexports   = dyads$nooilexp,
                       oil             = dyads$oil,
                       oilexports      = dyads$oilexp,
                       nonstrat        = dyads$nostrat,
                       nonstratexports = dyads$nostratexp,
                       strat           = dyads$strat,
                       stratexports    = dyads$stratexp
                       )
  return(sim.df)
}


UnzipFiles <- function(path) {
  files.zip <- list.files(path       = path, 
                          pattern    = ".zip",
                          full.names = TRUE
                          )
  for (i in files.zip) {
    unzip(i)
  }
}

SimYears <- function(path) {
  UnzipFiles(path)
  files.dta <- list.files(path       = path, 
                          pattern    = ".dta",
                          full.names = TRUE
                          )
  n.yrs <- length(files.dta)
  sim.list <- vector("list",
                     length = n.yrs
                     )
  for (i in 1 : n.yrs) {
    sim.list[[i]] <- GetSim(files.dta[i])
  }
  sim.df <- do.call(rbind,
                    sim.list
                    )
  return(sim.df)
}

#######################
# Run functions above #
# on directory where  #
# data files are      #
# located.            #
#######################

df.out <- SimYears(path)
                        # Change 'path' to the path into which the
                        # individual trade .zip files were extracted.

write.dta(df.out,
          "filename.dta"
          )
                        # Insert path and filename to which the data
                        # should be written here.
